home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_200
/
270_01
/
diff.c
< prev
next >
Wrap
Text File
|
1980-01-01
|
10KB
|
448 lines
/**
DIFF.C
File differencing program.
The algorithm used here is the "recursive longest matching
sequence" algorithm discussed in the 10/87 issue if DDJ.
Some code is borrowed from the accompanying article.
**/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "diff.h"
static char szBuffer[MAXLINE] ;
/**
Hash()
Calculate a hash value for a string. This value can be used
for a first level equality check for strings, thus eliminating
a time consuming strcmp() for each comparison.
The hash value is a variation on the standard checksum hash, which
includes the length of the string in the high word of the int.
**/
static unsigned Hash( szPtr )
char *szPtr ;
{
unsigned chksum ;
char *s ;
/*--- Compute checksum of string ---*/
for ( chksum = 0, s = szPtr ; *s ; chksum ^= *s++ )
;
/*--- return combined 7-bit checksum and length ---*/
return ( (chksum & 0xFF) | ((s - szPtr) << 8) ) ;
}
/**
ReadFile()
Read a file into an array of LINEs.
**/
static LINE *ReadFile( pFile, nLines )
FILE *pFile ;
int *nLines ;
{
LINE *pLine ;
int nSize ;
/*--- allocate one page of lines ---*/
pLine = (LINE *)calloc( PAGESIZE, sizeof(LINE *) ) ;
*nLines = 0 ;
while (pLine != NULL)
{
pLine[*nLines].addr = ftell( pFile ) ;
fgets( szBuffer, MAXLINE, pFile ) ;
if ( feof(pFile) )
break ;
pLine[*nLines].hash = Hash(szBuffer) ;
pLine[*nLines].linenum = *nLines + 1;
++(*nLines) ;
/*--- check to see if page filled ---*/
if ( *nLines % PAGESIZE == 0 )
{
nSize = (*nLines + PAGESIZE) * sizeof(LINE *) ;
pLine = (LINE *)realloc(pLine, nSize) ;
}
}
return pLine ;
}
/**
CheckHashes()
Check to see if the next N hash codes match. This function
assumes that the hash codes for the first compare match.
Returns:
TRUE if N or greater hash codes match
FALSE if less than N hash codes match
**/
static BOOL CheckHashes( pLine1, pLine2, nCount )
LINE *pLine1, /* Starting line in file 1 */
*pLine2 ; /* starting line in file 2 */
int nCount ; /* minimum number of codes to match */
{
int i ;
for ( i = 0 ; i < nCount && (++pLine1)->hash == (++pLine2)->hash ; i++ )
;
return ( i < nCount ) ? FALSE : TRUE ;
}
/**
CheckStrings()
This routine actually compares the strings associated with the
specified file lines.
Returns:
A count of how many consecutive strings match
**/
static int CheckStrings( pFile1, pLine1, pFile2, pLine2, nMost )
FILE *pFile1 ; /* file 1 stream */
LINE *pLine1 ; /* line in file 1 */
FILE *pFile2 ; /* file 2 stream */
LINE *pLine2 ; /* line in file 2 */
int nMost ; /* most possible matches */
{
static char szBuf1[MAXLINE], szBuf2[MAXLINE] ;
int i ;
/*--- seek to starting locations for strings ---*/
fseek( pFile1, pLine1->addr, SEEK_SET ) ;
fseek( pFile2, pLine2->addr, SEEK_SET ) ;
for ( i = 0 ; i < nMost ; i ++ )
{
fgets( szBuf1, MAXLINE, pFile1 ) ;
fgets( szBuf2, MAXLINE, pFile2 ) ;
if ( strcmp(szBuf1, szBuf2) )
break ;
}
return i ;
}
/**
PrintLines()
Print a range of lines form a file
**/
static void PrintLines( pFile1, pLine1, nLines1, pFile2, pLine2, nLines2 )
FILE *pFile1 ;
LINE *pLine1 ;
int nLines1 ;
FILE *pFile2 ;
LINE *pLine2 ;
int nLines2 ;
{
BOOL bSeparator = FALSE ;
int nType ;
if ( nLines1 > 0 && nLines2 > 0 )
nType = CHANGE ;
else if ( nLines1 > 0 )
nType = DELETE ;
else if ( nLines2 > 0 )
nType = ADD ;
if ( nLines1 > 0 )
fseek( pFile1, pLine1->addr, SEEK_SET ) ;
if ( nLines2 > 0 )
fseek( pFile2, pLine2->addr, SEEK_SET ) ;
switch( nType )
{
case CHANGE: /* change a range of lines */
if ( nLines1 > 1 )
printf( "%d,%d", pLine1->linenum,
pLine1->linenum+nLines1-1 ) ;
else
printf( "%d", pLine1->linenum ) ;
if ( nLines2 > 1 )
printf( "c%d,%d\n", pLine2->linenum,
pLine2->linenum+nLines2-1 ) ;
else
printf( "c%d\n", pLine2->linenum ) ;
bSeparator = TRUE ;
break ;
case DELETE:
if ( nLines1 > 1 )
printf( "%d,%d", pLine1->linenum,
pLine1->linenum+nLines1-1 ) ;
else
printf( "%d", pLine1->linenum ) ;
printf( "d%d\n", pLine2->linenum-1 ) ;
break ;
case ADD:
printf( "%d", pLine1->linenum-1 ) ;
if ( nLines2 > 1 )
printf( "a%d,%d\n", pLine2->linenum,
pLine2->linenum+nLines2-1 ) ;
else
printf( "a%d\n", pLine2->linenum ) ;
break ;
}
/*--- print the line range(s) ---*/
while ( nLines1-- > 0 )
{
fgets( szBuffer, MAXLINE, pFile1 ) ;
printf( "< %s", szBuffer ) ;
}
if ( bSeparator )
printf("---\n" ) ;
while ( nLines2-- > 0 )
{
fgets( szBuffer, MAXLINE, pFile2 ) ;
printf( "> %s", szBuffer ) ;
}
}
/**
DoDiff()
Difference two files using "recursive longest matching sequence"
algorithm.
**/
static void DoDiff( pFile1, pLine1, nMost1, pFile2, pLine2, nMost2, nLong )
FILE *pFile1 ; /* file 1 stream */
LINE *pLine1 ; /* start line in file 1 */
int nMost1 ; /* Max lines in file 1 */
FILE *pFile2 ; /* file 2 stream */
LINE *pLine2 ; /* start line in file 1 */
int nMost2 ; /* Max lines in file 1 */
int nLong ; /* 'long enough' match value */
{
int nMostSoFar = 0, /* longest sequence so far */
nLongest = 0, /* longest possible sequence */
nLines = 0, /* Lines in current match */
nLongStart1 = 0, /* start of longest sequence in file 1 */
nLongStart2 = 0, /* start of longest sequence in file 2 */
nLong1 = 0, /* length of longest possible in file 1 */
nLong2 = 0, /* length of longest possible in file 2 */
nStart1 = 0, /* starting line for recursion in file 1 */
nStart2 = 0 ; /* starting line for recursion in file 2 */
int i, j ;
for ( i = 0 ; i < nMost1 ; i ++ )
{
for ( j = 0 ; j < nMost2 ; j ++ )
{
if ( pLine1[i].hash == pLine2[j].hash )
if ( CheckHashes( pLine1+i, pLine2+j,
nMostSoFar) )
{
/* count this sequence */
nLongest = min( nMost1 -i, nMost2 -j );
nLines = CheckStrings( pFile1, pLine1+i,
pFile2, pLine2+j,
nLongest ) ;
if ( nLines > nMostSoFar )
{
nLongStart1 = i ;
nLongStart2 = j ;
nMostSoFar = nLines ;
}
if ( nMostSoFar > nLong )
{
/*
recurse using all lines
below the matching set
of lines.
*/
nLong1 = nLongStart1 ;
nLong2 = nLongStart2 ;
if ( nLong1 > 0 && nLong2 > 0 )
DoDiff( pFile1,
pLine1,
nLong1,
pFile2,
pLine2,
nLong2,
nLong );
else if ( nLong1>0 || nLong2>0)
PrintLines( pFile1,
pLine1,
nLong1,
pFile2,
pLine2,
nLong2 ) ;
/*
recurse using all lines
above the matching set
of lines.
*/
nStart1=nLongStart1+nMostSoFar;
nStart2=nLongStart2+nMostSoFar;
nLong1=nMost1-nStart1;
nLong2=nMost2-nStart2;
if ( nLong1 > 0 && nLong2 > 0 )
DoDiff( pFile1,
pLine1+nStart1,
nLong1,
pFile2,
pLine2+nStart2,
nLong2,
nLong );
else if ( nLong1>0 || nLong2>0)
PrintLines( pFile1,
pLine1+nStart1,
nLong1,
pFile2,
pLine2+nStart2,
nLong2 ) ;
return ;
}
}
}
}
/*
At this point, we have searched the entire range and
have the longest range of matches specified as starting
at line nLongestStartX and nMostSoFar lines long.
*/
if ( nMostSoFar > 0 ) /* any matches? */
{
/*
recurse using all lines
below the matching set
of lines.
*/
nLong1 = nLongStart1 ;
nLong2 = nLongStart2 ;
if ( nLong1 > 0 && nLong2 > 0 )
DoDiff( pFile1, pLine1, nLong1,
pFile2, pLine2, nLong2, nLong );
else if ( nLong1 > 0 || nLong2 > 0)
PrintLines( pFile1, pLine1, nLong1,
pFile2, pLine2,